library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
path = "/Users/tina/Documents/School/Capstone/Dallas Files/"
#vacant_parcels <- read.csv(paste0(path, "CPAL/DCAD_parcels_vac.csv"), stringsAsFactors = F)
#abatement_exempt <- read.csv(paste0(path, "DCAD2019/abatement_exempt.csv"), stringsAsFactors = F)
account_apprl_year <- read.csv(paste0(path, "DCAD2020_CURRENT/account_apprl_year.csv"), nrows=50000, stringsAsFactors = F)
account_info <- read.csv(paste0(path, "DCAD2020_CURRENT/account_info.csv"), nrows=50000, stringsAsFactors = F)
#account_tif <- read.csv(paste0(path, "DCAD2019/account_tif_small.csv"), stringsAsFactors = F)
account_exempt_value <- read.csv(paste0(path, "DCAD2020_CURRENT/acct_exempt_value.csv"), nrows=10000, stringsAsFactors = F)
#account_exempt_value$ACCOUNT_NUM <- as.character(account_exempt_value$ACCOUNT_NUM)
#applied_std_exempt <- read.csv(paste0(path, "DCAD2019/applied_std_exempt_small.csv"), stringsAsFactors = F)
#com_detail <- read.csv(paste0(path, "DCAD2019/com_detail_small.csv"), stringsAsFactors = F)
#land <- read.csv(paste0(path, "DCAD2019/land_small.csv"), stringsAsFactors = F)
#land$ACCOUNT_NUM <- as.character(land$ACCOUNT_NUM)
#multi_owner <- read.csv(paste0(path, "DCAD2019/multi_owner_small.csv"), stringsAsFactors = F)
#res_addl <- read.csv(paste0(path, "DCAD2019/res_addl_small.csv"), stringsAsFactors = F)
#res_detail <- read.csv(paste0(path, "DCAD2019/res_detail_small.csv"), stringsAsFactors = F)
#taxable_object <- read.csv(paste0(path, "DCAD2019/taxable_object_small.csv"), stringsAsFactors = F)
#taxable_object$ACCOUNT_NUM <- as.character(taxable_object$ACCOUNT_NUM)
#taxable_object$TAX_OBJ_ID <- as.character(taxable_object$TAX_OBJ_ID)
# Join all the files together
# df = #vacant_parcels %>%
# #left_join(account_apprl_year) %>%
# account_apprl_year %>%
# left_join(account_info) %>%
# left_join(account_tif) %>%
# left_join(account_exempt_value) %>%
# left_join(applied_std_exempt) %>%
# left_join(com_detail) %>%
# left_join(land) %>%
# left_join(multi_owner) %>%
# left_join(res_addl) %>%
# left_join(res_detail) %>%
# left_join(taxable_object)
df = account_info %>%
left_join(account_apprl_year)
## Joining, by = c("ACCOUNT_NUM", "APPRAISAL_YR", "DIVISION_CD", "GIS_PARCEL_ID")
summary(df$IMPR_VAL)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 0 71720 229377 180875 41386600 47341
df_gg = df %>%
filter(!is.na(IMPR_VAL)) %>%
filter(IMPR_VAL < 37794550) %>%
group_by(GIS_PARCEL_ID) %>%
summarise_all(max) %>% #deduplicate by parcel id and keep the max of all other columns
mutate(lt_1k_impr_val = ifelse(IMPR_VAL < 1000, "yes", "no"))
ggplot(df_gg, aes(x=lt_1k_impr_val, y = ..count.., fill=lt_1k_impr_val)) +
geom_bar() +
ggtitle("Count of Parcels <$1000 Improvement Value")
# bar graph of things less than 2k improvement
df = account_info %>%
left_join(account_apprl_year)
## Joining, by = c("ACCOUNT_NUM", "APPRAISAL_YR", "DIVISION_CD", "GIS_PARCEL_ID")
summary(factor(df$LMA))
## UNASSIGNED 2DSK10 2DSK13 1DSDE1 1DSJ10 2DSK09 2DSK11
## 45342 317 309 283 280 191 174
## 2DSK15 2DSK9A 1DSI10 2DSK14 1DSJ7G 1DSJ11 1DSJ7A
## 149 139 125 90 85 82 78
## 1DSOL1 1DSJ7F 2DSL10 1DSJ09 1DSI12 1DSJ7B 1DSOL7
## 78 77 71 68 64 60 54
## 1DSJ7E 2DSK16 1DSI09 1DSOL8 1DSOL6 1DSI11 2DSK08
## 51 51 46 46 43 41 40
## 1DSOL4 2DSL6B 2DSL7A 1DSOL2 1DSK07 1DSCEX 1DSOL3
## 38 38 37 35 33 31 28
## 1DSCE9 1DSH6B 1DSJ6A 2WS006 1OS005 1DSJ7D 1ISA08
## 26 25 25 24 22 21 20
## 1DSOL5 1DSK7A 2DSL09 2LS008 1DSI7A 1DSK6A 1DSJ06
## 19 18 18 18 16 16 15
## 2SS003 1DSH9C 2DSQ03 3DSI02 1DSK04 2DSL6A 2DSM07
## 15 14 14 14 13 13 13
## 3MSF01 1DSCE8 1DSH06 2DSCL1 2DSL07 2DSL12 2LS011
## 13 12 12 12 12 12 12
## 2PS001 3DSL01 1DSG04 1DSJ12 2DSQ04 2WS005 1DSJ5A
## 12 12 11 11 11 11 10
## 2DSK17 2DSN10 2DSR14 2PS002 3MSD05 1DSI05 3GSE02
## 10 10 10 10 10 9 9
## 3GSE03 3MSB02 3MSG02 1DSH16 1DSH9A 2DSM08 2DSP12
## 9 9 9 8 8 8 8
## 3DSI01 3DSM01 1DSJ7C 1DSK05 2DSK23 2DSL06 2DSN07
## 8 8 7 7 7 7 7
## 2DSN09 2LS007 2LS010 2SS002 2WS004 3DSJ03 3MSE01
## 7 7 7 7 7 7 7
## 3YSB05 (Other)
## 7 550
summary(df$IMPR_VAL)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 0 71720 229377 180875 41386600 47341
df_gg = df %>%
filter(!is.na(LMA)) %>%
filter(IMPR_VAL < 37794550) %>%
mutate(lt_1k_impr_val = ifelse(IMPR_VAL < 1000, "yes", "no"))
ggplot(df_gg, aes(x=LMA, y = ..count.., fill=lt_1k_impr_val)) +
geom_bar() +
ggtitle("Count of Parcels <$1000 Improvement Value by LMA")
df_gg %>%
count(lt_1k_impr_val = factor(lt_1k_impr_val)) %>%
mutate(pct = prop.table(n)) %>%
ggplot(aes(x = lt_1k_impr_val, y = pct, fill = lt_1k_impr_val, label = paste(scales::percent(pct), "\n ( n=", n, ")"))) +
geom_col(position = 'dodge') +
geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = .5, # nudge above top of bar
size = 3) +
scale_y_continuous(labels = scales::percent) +
ggtitle("Percent Parcels Less Than $1k Improvement Value")
summary(factor(df$NBHD_CD))
## UNASSIGNED 1DSA04 1DSA06 1DSD17 1DSA02 1DSD11 1DSG08
## 4658 1308 1251 750 557 491 428
## 1DSA07 1DSI01 1DS907 1DSJ22 1DSG06 1DSJ03 1DSM15
## 396 377 349 289 276 247 207
## 4DSG70 1DSD08 4DSE13 1DSJ19 1DSD14 1DSD04 4DSG15
## 203 196 190 174 168 167 159
## 1DSM26 1DSJ11 1DSJ12 1DSA05 1DSG11 1DSK01 4DSD39
## 154 148 148 146 142 141 141
## 4DSG85 4DSD89 1DSB01 4DSE18 1DSM17 1DSD12 1DSQ01
## 139 136 133 130 129 128 127
## 1DSJ21 1DSJ06 1DSJ08 5DSD06 1DSM12 4DSE27 1DSD16
## 125 123 123 122 121 116 108
## 1DSG09 4DSG44 4DSG40 1DS909 1DSJ24 4DSG45 1DSJ1H
## 108 108 107 106 106 104 101
## 1DSM21 1DSA01 1DSD03 4DSG11 4DSD79 4DSD13 1DSA10
## 101 99 99 98 96 95 94
## 4DSA14 4DSG08 1DSM19 5DSG05 5DSF01 1DSA09 2RSI01
## 90 90 88 87 86 84 84
## 4DSD42 4DSD44 4DSF10 1DSJ20 4DSF11 4DSI01 4DSM02
## 84 80 78 77 76 76 76
## 1DSM11 4DSD40 4DSE31 1DSF01 4DSD88 5DSJ13 1DSM04
## 75 74 73 72 72 72 71
## 4DSE29 4DSI27 4WSI28 4DSD81 1DSM13 4DSD11 4DSD36
## 70 70 70 69 67 67 67
## 1DSF03 1DSJ18 1DSM20 4DSG72 4DSH16 5IS108 4DSG47
## 65 65 65 65 64 64 63
## 4WSD06 1DSJ27 3MSG21 5DS311 3DSJ13 3GSH07 4DSD38
## 63 62 62 62 61 61 61
## 4DSD86 (Other)
## 61 29868
summary(df$IMPR_VAL)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 0 71720 229377 180875 41386600 47341
df_gg = df %>%
filter(!is.na(NBHD_CD)) %>%
filter(IMPR_VAL < 37794550) %>%
mutate(lt_1k_impr_val = ifelse(IMPR_VAL < 1000, "yes", "no"))
ggplot(df_gg, aes(x=NBHD_CD, y = ..count.., fill=lt_1k_impr_val)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Count of Parcels <$1000 Improvement Value by NBHD_CD")
# account_apprl_year X account_exempt
# contains auxiliary data for each appraisal account in Account_Apprl_Year.
df = account_exempt_value %>%
left_join(account_apprl_year)
## Joining, by = c("ACCOUNT_NUM", "APPRAISAL_YR")
summary(factor(df$CITY_APPLD_VAL))
## 1e+05 12000 0 5000 10000 7500 50000 100 56968 135000
## 780 767 288 226 151 121 24 9 7 7
## 160000 187500 312500 425000 525000 66000 99000 271960 11806 51120
## 7 7 7 7 7 6 6 6 5 5
## 71088 375000 60 8550 39102 47112 60000 81000 130000 140000
## 5 5 4 4 4 4 4 4 4 4
## 145200 150000 154000 166250 180000 325000 6408 10025 10245 12056
## 4 4 4 4 4 4 3 3 3 3
## 14412 14642 15157 17000 18464 19074 19296 24134 25313 26372
## 3 3 3 3 3 3 3 3 3 3
## 31092 37000 45144 54000 60912 67000 70000 74000 75000 76384
## 3 3 3 3 3 3 3 3 3 3
## 78000 78896 80000 91140 95000 96000 98000 101000 104000 105000
## 3 3 3 3 3 3 3 3 3 3
## 105024 120000 125000 134000 141000 158000 168000 170000 185000 188000
## 3 3 3 3 3 3 3 3 3 3
## 198000 2e+05 205000 208000 220000 224000 240000 246000 3e+05 340000
## 3 3 3 3 3 3 3 3 3 3
## 342500 382500 1700 5045 5072 5428 5870 7744 7846 (Other)
## 3 3 2 2 2 2 2 2 2 7309
summary(df$IMPR_VAL)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 32975 189440 770077 379400 72251070 9450
df_gg = df %>%
#filter(!is.na(CITY_APPLD_VAL)) %>%
filter(IMPR_VAL < 37794550)
# select(CITY_APPLD_VAL, CNTY_APPLD_VAL, ISD_APPLD_VAL, HOSPITAL_APPLD_VAL, COLLEGE_APPLD_VAL, SPCL_APPLIED_VAL)
summary(factor(df_gg$CITY_APPLD_VAL))
## 1e+05 12000 10000 5000 7500 0 100 60 271960 425000
## 57 36 9 8 7 5 5 3 3 3
## 71088 134000 325000 30 5651 7400 9844 10037 17000 17682
## 2 2 2 1 1 1 1 1 1 1
## 18424 19013 20000 20248 21601 24108 25293 26060 26174 26300
## 1 1 1 1 1 1 1 1 1 1
## 27572 27830 28353 28898 28915 29240 30874 30988 31438 31612
## 1 1 1 1 1 1 1 1 1 1
## 32286 33038 33166 33656 35609 35710 36050 36131 37000 37618
## 1 1 1 1 1 1 1 1 1 1
## 38720 40675 41050 41062 43222 43372 43622 43992 45112 45174
## 1 1 1 1 1 1 1 1 1 1
## 45468 48291 50000 50744 50751 51323 52840 56400 57665 59172
## 1 1 1 1 1 1 1 1 1 1
## 59382 59400 59512 61366 61560 61954 65068 65940 66088 66440
## 1 1 1 1 1 1 1 1 1 1
## 67930 68594 69672 70272 71837 71846 72356 72363 73284 73792
## 1 1 1 1 1 1 1 1 1 1
## 74147 74814 75030 75086 75532 75738 75826 75962 76000 (Other)
## 1 1 1 1 1 1 1 1 1 318
ggplot(df_gg, aes(x=CITY_APPLD_VAL, y = IMPR_VAL, color=CITY_APPLD_VAL, alpha=.5)) +
geom_point()
df_gg = df %>%
filter(!is.na(CITY_APPLD_VAL)) %>%
filter(IMPR_VAL < 1000)
ggplot(df_gg, aes(x=CITY_APPLD_VAL, y = IMPR_VAL, color=CITY_APPLD_VAL, alpha=.5)) +
geom_point()
# account_info
# contains auxiliary data for each appraisal account in Account_Apprl_Year.
df = account_apprl_year %>%
left_join(account_info)
## Joining, by = c("ACCOUNT_NUM", "APPRAISAL_YR", "GIS_PARCEL_ID", "DIVISION_CD")
summary(factor(df$OWNER_CITY))
## DALLAS IRVING GARLAND PLANO
## 2055 67 54 36
## MESQUITE RICHARDSON HOUSTON CARROLLTON
## 31 24 20 19
## CEDAR HILL FARMERS BRANCH ARLINGTON COPPELL
## 17 17 16 15
## DESOTO FRISCO AUSTIN LANCASTER
## 15 15 13 13
## ADDISON LEWISVILLE DUNCANVILLE ROWLETT
## 12 12 10 10
## GRAND PRAIRIE ARGYLE FORT WORTH MCKINNEY
## 9 7 7 7
## MURPHY SACHSE SOUTHLAKE ALLEN
## 6 6 6 5
## FLOWER MOUND SAN DIEGO SUGAR LAND ATHERTON
## 5 5 4 3
## ATLANTA FORNEY LUFKIN ROCKWALL
## 3 3 3 3
## SUNNYVALE WICHITA FALLS WYLIE
## 3 3 3 2
## BALCH SPRINGS CORINTH EULESS FT WORTH
## 2 2 2 2
## KATY LOS ALTOS LOS ANGELES MIDLAND
## 2 2 2 2
## NEW YORK SAN FRANCISCO SPRING THE COLONY
## 2 2 2 2
## TROPHY CLUB WHITTIER ALAMO ARCADIA
## 2 2 1 1
## BELLAIRE BROOKLYN BURLESON BURLINGAME
## 1 1 1 1
## CARLSBAD CLUTE COLLEGE STATION COLLEYVILLE
## 1 1 1 1
## COLUMBIA COMBINE COSTA MESA DENTON
## 1 1 1 1
## EUCLID FATE FLINT FOLSOM
## 1 1 1 1
## GAINESVILLE GLENDALE HIGHLAND VILLAGE HURST
## 1 1 1 1
## KAUFMAN KELLER KEMAH KINGWOOD
## 1 1 1 1
## LAKE DALLAS LINDSAY LITTLE ELM LOCKHART
## 1 1 1 1
## MABLETON MANAKIN SABOT MARSHALL MIDLOTHIAN
## 1 1 1 1
## MISSOURI CITY NEWPORT BEACH NORFOLK OVILLA
## 1 1 1 1
## PALMER PALO ALTO PARKER PLACERVILLE
## 1 1 1 1
## PROSPER QUITMAN (Other) NA's
## 1 1 23 47341
df_gg = df %>%
filter(!is.na(OWNER_CITY)) %>%
filter(IMPR_VAL < 37794550)
# select(APPRAISAL_YR, DIVISION_CD, BIZ_NAME, OWNER_CITY, OWNER_STATE, OWNER_ZIPCODE, OWNER_COUNTRY, PROPERTY_CITY, PROPERTY_ZIPCODE, MAPSCO, NBHD_CD, LEGAL1, LEGAL2, DEED_TXFR_DATE, LMA, IMA)
summary(factor(df$OWNER_CITY))
## DALLAS IRVING GARLAND PLANO
## 2055 67 54 36
## MESQUITE RICHARDSON HOUSTON CARROLLTON
## 31 24 20 19
## CEDAR HILL FARMERS BRANCH ARLINGTON COPPELL
## 17 17 16 15
## DESOTO FRISCO AUSTIN LANCASTER
## 15 15 13 13
## ADDISON LEWISVILLE DUNCANVILLE ROWLETT
## 12 12 10 10
## GRAND PRAIRIE ARGYLE FORT WORTH MCKINNEY
## 9 7 7 7
## MURPHY SACHSE SOUTHLAKE ALLEN
## 6 6 6 5
## FLOWER MOUND SAN DIEGO SUGAR LAND ATHERTON
## 5 5 4 3
## ATLANTA FORNEY LUFKIN ROCKWALL
## 3 3 3 3
## SUNNYVALE WICHITA FALLS WYLIE
## 3 3 3 2
## BALCH SPRINGS CORINTH EULESS FT WORTH
## 2 2 2 2
## KATY LOS ALTOS LOS ANGELES MIDLAND
## 2 2 2 2
## NEW YORK SAN FRANCISCO SPRING THE COLONY
## 2 2 2 2
## TROPHY CLUB WHITTIER ALAMO ARCADIA
## 2 2 1 1
## BELLAIRE BROOKLYN BURLESON BURLINGAME
## 1 1 1 1
## CARLSBAD CLUTE COLLEGE STATION COLLEYVILLE
## 1 1 1 1
## COLUMBIA COMBINE COSTA MESA DENTON
## 1 1 1 1
## EUCLID FATE FLINT FOLSOM
## 1 1 1 1
## GAINESVILLE GLENDALE HIGHLAND VILLAGE HURST
## 1 1 1 1
## KAUFMAN KELLER KEMAH KINGWOOD
## 1 1 1 1
## LAKE DALLAS LINDSAY LITTLE ELM LOCKHART
## 1 1 1 1
## MABLETON MANAKIN SABOT MARSHALL MIDLOTHIAN
## 1 1 1 1
## MISSOURI CITY NEWPORT BEACH NORFOLK OVILLA
## 1 1 1 1
## PALMER PALO ALTO PARKER PLACERVILLE
## 1 1 1 1
## PROSPER QUITMAN (Other) NA's
## 1 1 23 47341
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=OWNER_CITY, alpha=.5)) +
geom_point()
head(df)
summary(df)
## ACCOUNT_NUM APPRAISAL_YR IMPR_VAL LAND_VAL
## Length:50000 Min. :2020 Min. : 0 Min. : 0
## Class :character 1st Qu.:2020 1st Qu.: 40180 1st Qu.: 21988
## Mode :character Median :2020 Median : 132605 Median : 51355
## Mean :2020 Mean : 855632 Mean : 337330
## 3rd Qu.:2020 3rd Qu.: 321300 3rd Qu.: 232748
## Max. :2020 Max. :897418440 Max. :135493880
##
## LAND_AG_EXEMPT AG_USE_VAL TOT_VAL HMSTD_CAP_VAL
## Min. : 0 Min. : 0.00 Min. : 10 Min. : 10
## 1st Qu.: 0 1st Qu.: 0.00 1st Qu.: 101640 1st Qu.: 97700
## Median : 0 Median : 0.00 Median : 225395 Median : 221030
## Mean : 4326 Mean : 21.22 Mean : 1192962 Mean : 1189414
## 3rd Qu.: 0 3rd Qu.: 0.00 3rd Qu.: 590000 3rd Qu.: 585550
## Max. :25653360 Max. :68944.00 Max. :897418440 Max. :897418440
## NA's :6602 NA's :6602
## REVAL_YR PREV_REVAL_YR PREV_MKT_VAL TOT_CONTRIB_AMT
## Min. :2018 Min. :1000 Min. : 10 Min. : 0
## 1st Qu.:2020 1st Qu.:2018 1st Qu.: 89320 1st Qu.: 0
## Median :2020 Median :2019 Median : 212060 Median : 0
## Mean :2020 Mean :2012 Mean : 1116204 Mean : 19055
## 3rd Qu.:2020 3rd Qu.:2019 3rd Qu.: 565405 3rd Qu.: 0
## Max. :2021 Max. :2021 Max. :835104710 Max. :74280400
## NA's :629
## TAXPAYER_REP CITY_JURIS_DESC COUNTY_JURIS_DESC ISD_JURIS_DESC
## Length:50000 Length:50000 Length:50000 Length:50000
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## HOSPITAL_JURIS_DESC COLLEGE_JURIS_DESC SPECIAL_DIST_JURIS_DESC
## Length:50000 Length:50000 Length:50000
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## CITY_SPLIT_PCT COUNTY_SPLIT_PCT ISD_SPLIT_PCT
## Min. :0.0000000 Min. :0.0000000 Min. :0.0000000
## 1st Qu.:0.0000000 1st Qu.:0.0000000 1st Qu.:0.0000000
## Median :0.0000000 Median :0.0000000 Median :0.0000000
## Mean :0.0005902 Mean :0.0006672 Mean :0.0005902
## 3rd Qu.:0.0000000 3rd Qu.:0.0000000 3rd Qu.:0.0000000
## Max. :1.0000000 Max. :1.0000000 Max. :1.0000000
##
## HOSPITAL_SPLIT_PCT COLLEGE_SPLIT_PCT SPECIAL_DIST_SPLIT_PCT
## Min. :0.0000000 Min. :0.0000000 Min. :0.00e+00
## 1st Qu.:0.0000000 1st Qu.:0.0000000 1st Qu.:0.00e+00
## Median :0.0000000 Median :0.0000000 Median :0.00e+00
## Mean :0.0005902 Mean :0.0006102 Mean :4.64e-05
## 3rd Qu.:0.0000000 3rd Qu.:0.0000000 3rd Qu.:0.00e+00
## Max. :1.0000000 Max. :1.0000000 Max. :1.00e+00
##
## CITY_TAXABLE_VAL COUNTY_TAXABLE_VAL ISD_TAXABLE_VAL
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 81853 1st Qu.: 78988 1st Qu.: 81440
## Median : 195040 Median : 191225 Median : 198544
## Mean : 1036565 Mean : 1016846 Mean : 1034477
## 3rd Qu.: 494028 3rd Qu.: 486730 3rd Qu.: 515000
## Max. :897418440 Max. :540000000 Max. :540000000
##
## HOSPITAL_TAXABLE_VAL COLLEGE_TAXABLE_VAL SPECIAL_DIST_TAXABLE_VAL
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 78988 1st Qu.: 78710 1st Qu.: 0
## Median : 191235 Median : 191016 Median : 0
## Mean : 1018742 Mean : 1039284 Mean : 41197
## 3rd Qu.: 486934 3rd Qu.: 487155 3rd Qu.: 0
## Max. :540000000 Max. :540000000 Max. :57850000
##
## CITY_CEILING_VALUE COUNTY_CEILING_VALUE ISD_CEILING_VALUE
## Min. : 0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 0 1st Qu.: 105.5 1st Qu.: 838.1
## Median : 0 Median : 373.2 Median : 2541.3
## Mean : 29 Mean : 842.6 Mean : 4944.3
## 3rd Qu.: 0 3rd Qu.: 856.6 3rd Qu.: 5221.5
## Max. :4421 Max. :58832.5 Max. :273345.8
## NA's :46106 NA's :46106 NA's :46106
## HOSPITAL_CEILING_VALUE COLLEGE_CEILING_VALUE SPECIAL_DIST_CEILING_VALUE
## Mode:logical Min. :0 Mode:logical
## NA's:50000 1st Qu.:0 NA's:50000
## Median :0
## Mean :0
## 3rd Qu.:0
## Max. :0
## NA's :46106
## VID_IND GIS_PARCEL_ID APPRAISAL_METH_CD RENDITION_PENALTY
## Length:50000 Length:50000 Min. :1.000 Length:50000
## Class :character Class :character 1st Qu.:1.000 Class :character
## Mode :character Mode :character Median :1.000 Mode :character
## Mean :1.392
## 3rd Qu.:1.000
## Max. :3.000
##
## DIVISION_CD EXTRNL_CNTY_ACCT EXTRNL_CITY_ACCT P_BUS_TYP_CD
## Length:50000 Length:50000 Length:50000 Length:50000
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## BLDG_CLASS_CD SPTD_CODE BIZ_NAME OWNER_NAME1
## Length:50000 Length:50000 Length:50000 Length:50000
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## OWNER_NAME2 EXCLUDE_OWNER OWNER_ADDRESS_LINE1 OWNER_ADDRESS_LINE2
## Mode:logical Length:50000 Length:50000 Length:50000
## NA's:50000 Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## OWNER_ADDRESS_LINE3 OWNER_ADDRESS_LINE4 OWNER_CITY OWNER_STATE
## Mode:logical Mode:logical Length:50000 Length:50000
## NA's:50000 NA's:50000 Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## OWNER_ZIPCODE OWNER_COUNTRY STREET_NUM STREET_HALF_NUM
## Min. : 75152 Length:50000 Min. : 1 Mode:logical
## 1st Qu.:752024038 Class :character 1st Qu.: 1654 NA's:50000
## Median :752143759 Mode :character Median : 2934
## Mean :751503788 Mean : 3492
## 3rd Qu.:752271302 3rd Qu.: 4900
## Max. :956677816 Max. :17328
## NA's :47343 NA's :47341
## FULL_STREET_NAME BLDG_ID UNIT_ID PROPERTY_CITY
## Length:50000 Length:50000 Length:50000 Length:50000
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## PROPERTY_ZIPCODE MAPSCO NBHD_CD LEGAL1
## Min. : 75006 Length:50000 Length:50000 Length:50000
## 1st Qu.:752015508 Class :character Class :character Class :character
## Median :752142319 Mode :character Mode :character Mode :character
## Mean :653610548
## 3rd Qu.:752195409
## Max. :752548544
## NA's :47341
## LEGAL2 LEGAL3 LEGAL4 LEGAL5
## Length:50000 Length:50000 Length:50000 Length:50000
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## DEED_TXFR_DATE PHONE_NUM LMA IMA
## Length:50000 Mode:logical Length:50000 Length:50000
## Class :character NA's:50000 Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
colSums(is.na(df))
## ACCOUNT_NUM APPRAISAL_YR
## 0 0
## IMPR_VAL LAND_VAL
## 0 0
## LAND_AG_EXEMPT AG_USE_VAL
## 6602 6602
## TOT_VAL HMSTD_CAP_VAL
## 0 0
## REVAL_YR PREV_REVAL_YR
## 0 0
## PREV_MKT_VAL TOT_CONTRIB_AMT
## 629 0
## TAXPAYER_REP CITY_JURIS_DESC
## 0 0
## COUNTY_JURIS_DESC ISD_JURIS_DESC
## 0 0
## HOSPITAL_JURIS_DESC COLLEGE_JURIS_DESC
## 0 0
## SPECIAL_DIST_JURIS_DESC CITY_SPLIT_PCT
## 0 0
## COUNTY_SPLIT_PCT ISD_SPLIT_PCT
## 0 0
## HOSPITAL_SPLIT_PCT COLLEGE_SPLIT_PCT
## 0 0
## SPECIAL_DIST_SPLIT_PCT CITY_TAXABLE_VAL
## 0 0
## COUNTY_TAXABLE_VAL ISD_TAXABLE_VAL
## 0 0
## HOSPITAL_TAXABLE_VAL COLLEGE_TAXABLE_VAL
## 0 0
## SPECIAL_DIST_TAXABLE_VAL CITY_CEILING_VALUE
## 0 46106
## COUNTY_CEILING_VALUE ISD_CEILING_VALUE
## 46106 46106
## HOSPITAL_CEILING_VALUE COLLEGE_CEILING_VALUE
## 50000 46106
## SPECIAL_DIST_CEILING_VALUE VID_IND
## 50000 0
## GIS_PARCEL_ID APPRAISAL_METH_CD
## 0 0
## RENDITION_PENALTY DIVISION_CD
## 0 0
## EXTRNL_CNTY_ACCT EXTRNL_CITY_ACCT
## 0 0
## P_BUS_TYP_CD BLDG_CLASS_CD
## 0 0
## SPTD_CODE BIZ_NAME
## 0 47341
## OWNER_NAME1 OWNER_NAME2
## 47341 50000
## EXCLUDE_OWNER OWNER_ADDRESS_LINE1
## 47341 47341
## OWNER_ADDRESS_LINE2 OWNER_ADDRESS_LINE3
## 47341 50000
## OWNER_ADDRESS_LINE4 OWNER_CITY
## 50000 47341
## OWNER_STATE OWNER_ZIPCODE
## 47341 47343
## OWNER_COUNTRY STREET_NUM
## 47341 47341
## STREET_HALF_NUM FULL_STREET_NAME
## 50000 47341
## BLDG_ID UNIT_ID
## 47341 47341
## PROPERTY_CITY PROPERTY_ZIPCODE
## 47341 47341
## MAPSCO NBHD_CD
## 47341 47341
## LEGAL1 LEGAL2
## 47341 47341
## LEGAL3 LEGAL4
## 47341 47341
## LEGAL5 DEED_TXFR_DATE
## 47341 47341
## PHONE_NUM LMA
## 50000 47341
## IMA
## 47341
There aren’t many cities listed. We only want dallas anyways. But there are tons of NA.
df_gg = df %>%
filter(!is.na(PROPERTY_CITY)) %>%
filter(IMPR_VAL < 37794550)
# select(APPRAISAL_YR, DIVISION_CD, BIZ_NAME, OWNER_CITY, OWNER_STATE, OWNER_ZIPCODE, OWNER_COUNTRY, PROPERTY_CITY, PROPERTY_ZIPCODE, MAPSCO, NBHD_CD, LEGAL1, LEGAL2, DEED_TXFR_DATE, LMA, IMA)
summary(factor(df$PROPERTY_CITY))
## ADDISON BALCH SPRINGS CARROLLTON (DALLAS CO)
## 4 10 27
## CEDAR HILL COCKRELL HILL COMBINE
## 21 1 1
## COPPELL (DALLAS CO) DALLAS DESOTO
## 14 2363 13
## DUNCANVILLE FARMERS BRANCH GARLAND (DALLAS CO)
## 10 9 42
## GRAND PRAIRIE (DALLAS CO) HIGHLAND PARK IRVING
## 6 5 62
## LANCASTER MESQUITE (DALLAS CO) NO TOWN
## 7 33 1
## RICHARDSON ROWLETT (DALLAS CO) SACHSE
## 8 9 4
## SUNNYVALE UNIVERSITY PARK WILMER
## 2 6 1
## NA's
## 47341
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=PROPERTY_CITY, alpha=.5)) +
geom_point()
# see the same for points with <2k improvement value
df_gg = df %>%
filter(!is.na(PROPERTY_CITY)) %>%
filter(IMPR_VAL < 1000)
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=PROPERTY_CITY, alpha=.5)) +
geom_point()
There aren’t many zipcodes for us to see
df_gg = df %>%
filter(!is.na(PROPERTY_ZIPCODE)) %>%
filter(IMPR_VAL < 37794550)
# select(APPRAISAL_YR, DIVISION_CD, BIZ_NAME, OWNER_CITY, OWNER_STATE, OWNER_ZIPCODE, OWNER_COUNTRY, PROPERTY_CITY, PROPERTY_ZIPCODE, MAPSCO, NBHD_CD, LEGAL1, LEGAL2, DEED_TXFR_DATE, LMA, IMA)
summary(factor(df$PROPERTY_ZIPCODE))
## 75201 75215 75202 75226 75204 75223 75246 752203452
## 68 67 45 35 25 20 13 9
## 75207 75208 75061 752095808 752096255 752261204 75212 75219
## 8 8 7 7 6 6 5 5
## 750061433 752102026 752155125 752195409 752261425 752261512 75203 75234
## 5 5 5 5 5 5 4 4
## 752014702 752055521 752095926 752095927 752096214 752122333 752154640 752154948
## 4 4 4 4 4 4 4 4
## 752155010 752155014 752155043 752155118 752155144 752155262 752194437 752233551
## 4 4 4 4 4 4 4 4
## 752461062 75006 75214 752011254 752013419 752014316 752015508 752015509
## 4 3 3 3 3 3 3 3
## 752015816 752021927 752023335 752043920 752045537 752045541 752045543 752055418
## 3 3 3 3 3 3 3 3
## 752082120 752083955 752093616 752093624 752095913 752095924 752143748 752143801
## 3 3 3 3 3 3 3 3
## 752143817 752143840 752144011 752144013 752144556 752151150 752151244 752151324
## 3 3 3 3 3 3 3 3
## 752154617 752154641 752154939 752154942 752154955 752154958 752155030 752155036
## 3 3 3 3 3 3 3 3
## 752155037 752155119 752155129 752155152 752155338 752155352 752155357 752193816
## 3 3 3 3 3 3 3 3
## 752231163 752231914 752233503 752261215 752261505 752261510 752261608 752261627
## 3 3 3 3 3 3 3 3
## 752261719 752262106 (Other) NA's
## 3 3 2048 47341
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=PROPERTY_ZIPCODE, alpha=.5)) +
geom_point()
# see the same for points with <2k improvement value
df_gg = df %>%
filter(!is.na(PROPERTY_ZIPCODE)) %>%
filter(IMPR_VAL < 1000)
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=PROPERTY_ZIPCODE, alpha=.5)) +
geom_point()
Res have lower value than commercial
df_gg = df %>%
filter(!is.na(DIVISION_CD)) %>%
filter(IMPR_VAL < 37794550)
# select(APPRAISAL_YR, DIVISION_CD, BIZ_NAME, OWNER_CITY, OWNER_STATE, OWNER_ZIPCODE, OWNER_COUNTRY, PROPERTY_CITY, PROPERTY_ZIPCODE, MAPSCO, NBHD_CD, LEGAL1, LEGAL2, DEED_TXFR_DATE, LMA, IMA)
summary(factor(df$DIVISION_CD))
## BPP COM RES
## 6410 13233 30357
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=DIVISION_CD, alpha=.5)) +
geom_point()
# see the same for points with <2k improvement value
df_gg = df %>%
filter(!is.na(DIVISION_CD)) %>%
filter(IMPR_VAL < 1000)
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=DIVISION_CD, alpha=.5)) +
geom_point()
df_gg = df %>%
filter(!is.na(MAPSCO)) %>%
filter(IMPR_VAL < 37794550)
# select(APPRAISAL_YR, DIVISION_CD, BIZ_NAME, OWNER_CITY, OWNER_STATE, OWNER_ZIPCODE, OWNER_COUNTRY, PROPERTY_CITY, PROPERTY_ZIPCODE, MAPSCO, NBHD_CD, LEGAL1, LEGAL2, DEED_TXFR_DATE, LMA, IMA)
summary(factor(df$MAPSCO))
## 56-G (DALLAS) 56-C (DALLAS) 45-L (DALLAS) 46-F (DALLAS) 56-D (DALLAS)
## 80 72 63 63 62
## 46-J (DALLAS) 45-M (DALLAS) 45-Q (DALLAS) 37-W (DALLAS) 45-U (DALLAS)
## 58 55 54 48 47
## 45-H (DALLAS) 47-J (DALLAS) 34-M (DALLAS) 45-K (DALLAS) 37-S (DALLAS)
## 42 42 40 35 34
## 45-P (DALLAS) 46-K (DALLAS) 45-F (DALLAS) 34-R (DALLAS) 45-D (DALLAS)
## 34 34 31 28 27
## 45-V (DALLAS) 46-B (DALLAS) 34-G (DALLAS) 35-T (DALLAS) 35-S (DALLAS)
## 26 25 24 24 22
## 46-A (DALLAS) 46-N (DALLAS) 46-L (DALLAS) 45-R (DALLAS) 46-C (DALLAS)
## 22 21 20 18 18
## 36-L (DALLAS) 36-V (DALLAS) 36-M (DALLAS) 46-E (DALLAS) 34-L (DALLAS)
## 17 17 16 16 15
## 35-R (DALLAS) 35-X (DALLAS) 36-P (DALLAS) 36-Q (DALLAS) 45-B (DALLAS)
## 15 15 15 15 15
## 36-N (DALLAS) 35-W (DALLAS) 36-K (DALLAS) 45-N (DALLAS) 46-R (DALLAS)
## 14 13 13 13 13
## 47-N (DALLAS) 55-G (DALLAS) 36-S (DALLAS) 36-Z (DALLAS) 45-C (DALLAS)
## 13 13 12 12 12
## 46-V (DALLAS) 47-A (DALLAS) 56-B (DALLAS) 36-H (DALLAS) 36-T (DALLAS)
## 12 12 12 11 11
## 36-W (DALLAS) 36-X (DALLAS) 43-K (DALLAS) 44-X (DALLAS) 45-G (DALLAS)
## 11 11 11 11 11
## 46-X (DALLAS) 35-V (DALLAS) 43-Z (DALLAS) 45-E (DALLAS) 46-H (DALLAS)
## 11 10 10 10 10
## 54-B (DALLAS) 54-G (DALLAS) 36-J (DALLAS) 36-U (DALLAS) 44-T (DALLAS)
## 10 10 9 9 9
## 46-M (DALLAS) 46-T (DALLAS) 46-Y (DALLAS) 54-C (DALLAS) 58-B (DALLAS)
## 9 9 9 9 9
## 2-K (DALLAS) 46-G (DALLAS) 47-E (DALLAS) 48-V (DALLAS) 54-N (DALLAS)
## 8 8 8 8 8
## 23-U (DALLAS) 33-M (DALLAS) 34-V (DALLAS) 44-Z (DALLAS) 45-J (DALLAS)
## 7 7 7 7 7
## 54-F (DALLAS) 34-C (DALLAS) 36-F (DALLAS) 37-T (DALLAS) 37-X (DALLAS)
## 7 6 6 6 6
## 44-P (DALLAS) 44-V (DALLAS) 44-Y (DALLAS) 45-Z (DALLAS) 46-S (DALLAS)
## 6 6 6 6 6
## 46-U (DALLAS) 54-S (DALLAS) 55-J (DALLAS) (Other) NA's
## 6 6 6 796 47341
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=MAPSCO, alpha=.5)) +
geom_point()
# see the same for points with <2k improvement value
df_gg = df %>%
filter(!is.na(MAPSCO)) %>%
filter(IMPR_VAL < 1000)
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=MAPSCO, alpha=.5)) +
geom_point()
df_gg = df %>%
filter(!is.na(NBHD_CD)) %>%
filter(IMPR_VAL < 37794550)
# select(APPRAISAL_YR, DIVISION_CD, BIZ_NAME, OWNER_CITY, OWNER_STATE, OWNER_ZIPCODE, OWNER_COUNTRY, PROPERTY_CITY, PROPERTY_ZIPCODE, MAPSCO, NBHD_CD, LEGAL1, LEGAL2, DEED_TXFR_DATE, LMA, IMA)
summary(factor(df$NBHD_CD))
## UNASSIGNED 1DSA07 1DSA01 5DSF01 1DSD12 1DSM12 5DSS06
## 830 175 35 33 32 32 32
## 1DSQ01 1DSA04 1DSK01 1DSA02 1DSJ19 1DSA06 1DSM11
## 31 30 30 27 26 23 22
## 1DSJ11 1DSD03 4DSA8C 1DSJ03 1DSM15 1DSJ1H 1DSD11
## 20 15 15 13 13 12 11
## 1DSG11 4DSA03 1DSJ21 1DSJ22 1DSM13 1DSM17 1DSM19
## 11 11 10 10 10 10 10
## 1DSM21 1DSS04 4DSI04 1DSD04 1DSF01 1DSG08 4DSE13
## 10 10 10 9 9 9 9
## 5DS101 1DSJ06 1DSP10 4DSD44 4DSG15 5DS315 5DSL10
## 9 8 8 8 8 8 8
## 1DSD16 1DSJ12 4DSG08 4DSG49 4DSM05 4WSI26 1DSD17
## 7 7 7 7 7 7 6
## 1DSG06 1DSJ07 1DSJ18 1DSJ24 1DSM09 4DSE11 4DSM06
## 6 6 6 6 6 6 6
## 4DSP01 5DS299 1DS932 1DSB01 1DSJ13 1DSM20 1DSM26
## 6 6 5 5 5 5 5
## 1DSP09 1DSS06 4DSG03 4DSG11 4DSG40 4DSG47 4DSG70
## 5 5 5 5 5 5 5
## 4DSH16 4DSK02 4DSL02 4DSP02 5AS310 5DS120 5DSG03
## 5 5 5 5 5 5 5
## 5DSG04 5DSM08 1DS483 1DS907 1DSG09 1DSM05 1DSP02
## 5 5 4 4 4 4 4
## 1DSV01 2DSM02 4DS209 4DSD79 4DSG38 4DSG45 1DS204
## 4 4 4 4 4 4 3
## 1DS214 1DS500 1DSD08 1DSD15 1DSJ27 1DSM02 1DSM04
## 3 3 3 3 3 3 3
## (Other) NA's
## 732 47341
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=NBHD_CD, alpha=.5)) +
geom_point()
# see the same for points with <2k improvement value
df_gg = df %>%
filter(!is.na(NBHD_CD)) %>%
filter(IMPR_VAL < 1000)
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=NBHD_CD, alpha=.5)) +
geom_point()
df_gg = df %>%
filter(!is.na(LEGAL1)) %>%
filter(IMPR_VAL < 37794550)
# select(APPRAISAL_YR, DIVISION_CD, BIZ_NAME, OWNER_CITY, OWNER_STATE, OWNER_ZIPCODE, OWNER_COUNTRY, PROPERTY_CITY, PROPERTY_ZIPCODE, MAPSCO, NBHD_CD, LEGAL1, LEGAL2, DEED_TXFR_DATE, LMA, IMA)
summary(factor(df$LEGAL1))
## LINCOLN MANOR IDEAL
## 66 39
## CROWDUS & AKARD SANTA MONICA
## 31 31
## FAIRVIEW WEBSTER & WOODS NORTH PARK
## 26 25
## RAILROAD BRYAN PLACE REV 2
## 23 22
## SMITH MURPHY & MARTIN ERVAY CEDARS
## 19 17
## OAKLAND PLACE UNIVERSITY PLACE
## 16 15
## VICKERY PLACE COUNTRY CLUB ESTATES
## 14 13
## SKYLINE HEIGHTS REV BEEMAN ESTATES
## 13 12
## CAMPS ALEX ELIZA MCCOYS
## 12 12
## GASTON HOMESTEAD UNREC GASTONWOOD
## 12 12
## GRAVES HEIGHTS LINCOLN MANOR 2
## 12 12
## MOCKINGBIRD PARK COLUMBIA PLACE
## 12 11
## ELITE LAKESHORE HILLS REV
## 11 11
## QUEEN CITY SPRING AVENUE
## 11 11
## CARUTH TERRACE EXPOSITION PARK
## 10 10
## PASADENA PEAKS SUBURBAN
## 10 10
## PIEDMONT WALNUT GROVE
## 10 10
## BROWDERS LAFOY PLACE
## 9 9
## MUNGER PLACE BEVERLY TERRACE
## 9 8
## BROWDER'S PROVIDENCE ERWINS
## 8 8
## GOODS HIGHLAND HILLS
## 8 8
## JUNIUS HEIGHTS LOMAS PARK
## 8 8
## MIDDLETON BROS CENTRAL AVE OZONE
## 8 8
## SNODGRASS TRADEWINDS CONDOS
## 8 8
## TUBERVILLE
## 8 7
## BELMONT SUBURBAN COCKRELL'S
## 7 7
## COLLEGE HILL DALLAS LAND AND LOAN 2
## 7 7
## GASTON TERRACE H T LACKEY
## 7 7
## ORIGINAL TOWN OF DALLAS ROBINSON
## 7 7
## VERSAILLES PARKWAY REVISED ALTA VISTA
## 7 6
## BLK 217 BLK 331 1/2
## 6 6
## BLK 801 BURK & SLAUGHTER
## 6 6
## C W GUILDS PARK AVENUE CHAPEL CREEK REV
## 6 6
## DALLAS LAND AND LOAN 3 DOLPHIN HEIGHTS
## 6 6
## E H RAYS EAST WE GO
## 6 6
## EASTON CONDOMINIUMS GASTONWOOD REV
## 6 6
## GREENLAND HILLS HILL TOP SECTION HASKELL PLACE
## 6 6
## INDUSTRIAL IMPROVEMENT PROJECT J A JOHNSTONS
## 6 6
## J BOPPS HOMESTEAD J J EAKINS 2ND
## 6 6
## KESSLER SQUARE MCCOYS
## 6 6
## MONTICELLO OAK CLIFF ORIGINAL
## 6 6
## T J RODGERS 1100 TRINITY MILLS CONDO
## 6 5
## BERGFELD PLACE BLANKENSHIP & BLAKE'S COTTON MILL
## 5 5
## BLK 2518 BOB-O-LINKS DOWNS
## 5 5
## BOWSER & LEMMONS OAK LAWN DALLAS & WICHITA RAILROAD
## 5 5
## EAST COLUMBIA PLACE EDGEFIELD HEIGHTS
## 5 5
## ELLINGTON PARK GREENVILLE CREST ANNEX
## 5 5
## HERRLING F C KESSLER HIGHLANDS
## 5 5
## MONTEBELLO PARK RENAISSANCE ON TURTLE CREEK CONDO
## 5 5
## (Other) NA's
## 1666 47341
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=LEGAL1, alpha=.5)) +
geom_point()
# see the same for points with <2k improvement value
df_gg = df %>%
filter(!is.na(LEGAL1)) %>%
filter(IMPR_VAL < 1000)
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=LEGAL1, alpha=.5)) +
geom_point()
df_gg = df %>%
filter(!is.na(LMA)) %>%
filter(IMPR_VAL < 37794550)
# select(APPRAISAL_YR, DIVISION_CD, BIZ_NAME, OWNER_CITY, OWNER_STATE, OWNER_ZIPCODE, OWNER_COUNTRY, PROPERTY_CITY, PROPERTY_ZIPCODE, MAPSCO, NBHD_CD, LEGAL1, LEGAL2, DEED_TXFR_DATE, LMA, IMA)
summary(factor(df$LMA))
## 1AS001 1ASVR1 1CA002 1DSC02 1DSCE7 1DSCE8 1DSCE9
## 1 1 1 1 1 1 10
## 1DSCEX 1DSD04 1DSDE1 1DSE05 1DSF05 1DSF06 1DSF4A
## 15 1 91 1 1 1 1
## 1DSG07 1DSG12 1DSG16 1DSH06 1DSH07 1DSH16 1DSH6B
## 1 1 1 5 1 2 3
## 1DSH9A 1DSH9B 1DSH9C 1DSI06 1DSI07 1DSI09 1DSI10
## 3 2 1 1 1 4 29
## 1DSI11 1DSI12 1DSI5A 1DSI7A 1DSJ06 1DSJ09 1DSJ10
## 4 5 3 7 3 23 88
## 1DSJ11 1DSJ6A 1DSJ7A 1DSJ7B 1DSJ7C 1DSJ7D 1DSJ7E
## 8 8 37 27 4 10 20
## 1DSJ7F 1DSJ7G 1DSK04 1DSK07 1DSK6A 1DSK7A 1DSOL1
## 38 44 1 12 9 8 9
## 1DSOL2 1DSOL3 1DSOL4 1DSOL5 1DSOL6 1DSOL7 1DSOL8
## 3 2 2 5 20 18 16
## 1ISA08 1ISB05 1ISB08 1ISD08 1OS001 1OS002 2DSK08
## 3 1 1 1 1 1 10
## 2DSK09 2DSK10 2DSK11 2DSK12 2DSK13 2DSK14 2DSK15
## 47 20 10 1 9 4 3
## 2DSK16 2DSK17 2DSK21 2DSK9A 2DSL06 2DSL08 2DSL09
## 10 1 1 45 1 1 2
## 2DSL10 2DSL11 2DSL4A 2DSL6A 2DSL6B 2DSL7A 2DSM05
## 1 2 1 4 9 6 1
## 2DSM08 2DSM12 2DSM6A 2DSN11 2DSP6A 2DSQ04 2DSR14
## 1 1 1 1 1 1 2
## 2PS007 3GSG03 3MSA02 3MSC02 3MSD03 3MSF01 3MSG02
## 2 1 1 1 1 1 2
## UNASSIGNED NA's
## 1829 47341
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=LMA, alpha=.5)) +
geom_point()
# see the same for points with <2k improvement value
df_gg = df %>%
filter(!is.na(LMA)) %>%
filter(IMPR_VAL < 1000)
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=LMA, alpha=.5)) +
geom_point()
THERE IS NOTHING TO SEE IN IMA
df_gg = df %>%
filter(!is.na(IMA)) %>%
filter(IMPR_VAL < 37794550)
# select(APPRAISAL_YR, DIVISION_CD, BIZ_NAME, OWNER_CITY, OWNER_STATE, OWNER_ZIPCODE, OWNER_COUNTRY, PROPERTY_CITY, PROPERTY_ZIPCODE, MAPSCO, NBHD_CD, LEGAL1, LEGAL2, DEED_TXFR_DATE, LMA, IMA)
summary(factor(df$IMA))
## 100100 100300 100800 110100 110400 111100 211800
## 5 1 2 12 2 1 5
## 212300 360100 3602OL 360500 360600 360800 360900
## 1 15 2 2 4 3 15
## 3609DE 540100 540200 540300 540800 541500 541700
## 18 46 10 10 1 2 1
## 660900 720100 720200 720400 720500 720800 720900
## 2 3 3 1 5 2 7
## UNASSIGNED NA's
## 2478 47341
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=IMA, alpha=.5)) +
geom_point()
# see the same for points with <2k improvement value
df_gg = df %>%
filter(!is.na(IMA)) %>%
filter(IMPR_VAL < 1000)
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=IMA, alpha=.5)) +
geom_point()
df_gg = df %>%
filter(!is.na(IMA)) %>%
filter(IMPR_VAL < 1000)
# select(APPRAISAL_YR, DIVISION_CD, BIZ_NAME, OWNER_CITY, OWNER_STATE, OWNER_ZIPCODE, OWNER_COUNTRY, PROPERTY_CITY, PROPERTY_ZIPCODE, MAPSCO, NBHD_CD, LEGAL1, LEGAL2, DEED_TXFR_DATE, LMA, IMA)
summary(factor(df$IMA))
## 100100 100300 100800 110100 110400 111100 211800
## 5 1 2 12 2 1 5
## 212300 360100 3602OL 360500 360600 360800 360900
## 1 15 2 2 4 3 15
## 3609DE 540100 540200 540300 540800 541500 541700
## 18 46 10 10 1 2 1
## 660900 720100 720200 720400 720500 720800 720900
## 2 3 3 1 5 2 7
## UNASSIGNED NA's
## 2478 47341
ggplot(df_gg, aes(x=TOT_VAL, y = IMPR_VAL, color=IMA, alpha=.5)) +
geom_point()
almost all the columns highly correlate with tot_val and are very right skewed
# Abatement exempt
# about properties for which abatements have been granted
# head(abatement_exempt)
# # df = abatement_exempt
# columns_list = list(apprl_yr = abatement_exempt$APPRAISAL_YR,
# tot_val = abatement_exempt$TOT_VAL,
# city_eff_yr = abatement_exempt$CITY_EFF_YR,
# city_val_dif = abatement_exempt$CITY_VAL_DIF,
# cnty_val_dif = abatement_exempt$CNTY_VAL_DIF,
# isd_val_dif = abatement_exempt$ISD_VAL_DIF,
# coll_val_dif = abatement_exempt$COLL_VAL_DIF,
# spec_exemption_pct = abatement_exempt$SPEC_EXEMPTION_PCT,
# spec_val_dif = abatement_exempt$SPEC_VAL_DIF,
# spec_exemption_amt = abatement_exempt$SPEC_EXEMPTION_AMT)
# i = 1
# for(column in columns_list){
# name = names(columns_list)[i]
# plot(density(x=column), main = paste("Density Plot of", name))
#
# plot(x=abatement_exempt$TOT_VAL, y = column, main = paste("Scatterplot of tot_val vs", name))
# i = i+1
# }
#
#Account_apprl_year
# primary or central table in this dataset, it contains summary information about all appraisal accounts.
#head(account_apprl_year)
#df = account_apprl_year
# columns_list = list(impr_val = df$IMPR_VAL,
# land_val = df$LAND_VAL,
# tot_val = df$TOT_VAL,
# hmstd_cap_val = df$HMSTD_CAP_VAL,
# prev_reval_yr = df$PREV_REVAL_YR,
# prev_mkt_val = df$PREV_MKT_VAL,
# city_taxable_val = df$CITY_TAXABLE_VAL,
# county_taxable_val = df$COUNTY_TAXABLE_VAL,
# isd_taxable_val = df$ISD_TAXABLE_VAL,
# hospital_taxable_val = df$HOSPITAL_TAXABLE_VAL,
# college_taxable_val = df$COLLEGE_TAXABLE_VAL,
# county_ceiling_value = df$COUNTY_CEILING_VALUE,
# isd_ceiling_value = df$ISD_CEILING_VALUE,
# appraisal_meth_cd = df$APPRAISAL_METH_CD
# )
#
# i = 1
# for(column in columns_list){
# name = names(columns_list)[i]
# gg_df = data.frame(x = column, tot_val = df$TOT_VAL)
# #print(head(gg_df))
# print(ggplot(gg_df, aes(x=x, y=tot_val)) +
# geom_point() +
# ggtitle(paste("Scatterplot of", name)))
#
# #plot(density(x=column), main = paste("Density Plot of", name))
#
# #plot(x=df$TOT_VAL, y = column, main = paste("Scatterplot of tot_val vs", name))
# i = i+1
# }
summary(account_info)
## ACCOUNT_NUM APPRAISAL_YR DIVISION_CD BIZ_NAME
## Length:50000 Min. :2020 Length:50000 Length:50000
## Class :character 1st Qu.:2020 Class :character Class :character
## Mode :character Median :2020 Mode :character Mode :character
## Mean :2020
## 3rd Qu.:2020
## Max. :2020
##
## OWNER_NAME1 OWNER_NAME2 EXCLUDE_OWNER OWNER_ADDRESS_LINE1
## Length:50000 Mode:logical Length:50000 Length:50000
## Class :character NA's:50000 Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## OWNER_ADDRESS_LINE2 OWNER_ADDRESS_LINE3 OWNER_ADDRESS_LINE4 OWNER_CITY
## Length:50000 Mode:logical Mode:logical Length:50000
## Class :character NA's:50000 NA's:50000 Class :character
## Mode :character Mode :character
##
##
##
##
## OWNER_STATE OWNER_ZIPCODE OWNER_COUNTRY STREET_NUM
## Length:50000 Min. : 75050 Length:50000 Min. : 1
## Class :character 1st Qu.:751343039 Class :character 1st Qu.: 1412
## Mode :character Median :752122132 Mode :character Median : 2909
## Mean :751968379 Mean : 3616
## 3rd Qu.:752232123 3rd Qu.: 4859
## Max. :997016578 Max. :900009
## NA's :66
## STREET_HALF_NUM FULL_STREET_NAME BLDG_ID UNIT_ID
## Mode:logical Length:50000 Length:50000 Length:50000
## NA's:50000 Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## PROPERTY_CITY PROPERTY_ZIPCODE MAPSCO NBHD_CD
## Length:50000 Min. : 75001 Length:50000 Length:50000
## Class :character 1st Qu.:751155736 Class :character Class :character
## Mode :character Median :752116421 Mode :character Mode :character
## Mean :706756744
## 3rd Qu.:752194206
## Max. :760512709
## NA's :3
## LEGAL1 LEGAL2 LEGAL3 LEGAL4
## Length:50000 Length:50000 Length:50000 Length:50000
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## LEGAL5 DEED_TXFR_DATE GIS_PARCEL_ID PHONE_NUM
## Length:50000 Length:50000 Length:50000 Mode:logical
## Class :character Class :character Class :character NA's:50000
## Mode :character Mode :character Mode :character
##
##
##
##
## LMA IMA
## Length:50000 Length:50000
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
# Loop
# i = 1
# for(column in names(df)){
# print(column)
# name = column
# print(length(df[[column]]))
# print(length(df$OWNER_CITY))
# gg_df = data.frame(x = df[[column]], OWNER_CITY = df$OWNER_CITY)
#
# print(head(gg_df))
# print(ggplot(gg_df, aes(x=x, y = ..count.., fill=x)) +
# geom_bar())
# #print(ggplot(gg_df, aes(x=x, y= ..count..)) +
# # geom_bar(aes(fill = OWNER_CITY), position = "dodge") +
# # ggtitle(paste("Scatterplot of", name)))
#
# #plot(density(x=column), main = paste("Density Plot of", name))
#
# #plot(x=df$TOT_VAL, y = column, main = paste("Scatterplot of tot_val vs", name))
# i = i+1
# }
# account_tif
# contains information about appraisal accounts in Tax Increment Finance districts.
# acct_exempt_value
# contains the value of each exemption granted
# applied_std_exempt
# contains information about standard exemptions granted.
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.